Technological advances in metabolomics make it increasingly reliable to comprehensively detect and quantify small compounds in an unbiased manner. With the reduced costs come a wide range of applications, for example, in epidemiological, genetic and clinical studies. Meanwhile, integrated analyses of metabolomics with other omics, for example, gene expression transcriptomic datasets, are receiving the increasing attention, including but not limited to: graphite, Metscape, OmicsNet, PaintOmics and OmicsAnalyst. Most of these existing tools, however, only deal with individual pathways or can’t extract the subnetwork involving multiple pathways; in other words, they do not consider the network knowledge of gene-metabolite or metabolite-metabolite relations hidden in available metabolic pathways. Here we present “MNet”, an R package enabling network-based integration of metabolomics with clinical and transcriptomic data. | In this tutorial, we will present: | How to install MNet. | A quick start tutorial of MNet. | Detailed functionality description of MNet. | Two use cases using MNet. | A time-course use case using MNet. | Structure of MNet object.
The MNet R package requires R version 4.0.0 or higher.
ggcor and XGR are from GitHub. Hence, it is recommended to install the package before installing MNet.
devtools::install_github("Github-Yilei/ggcor")
devtools::install_github("hfang-bristol/XGR")
MNet is available for all operating systems and can be installed via the Github.
devtools::install_github("tuantuangui/MNet")
#load("result/metabo_dat.rda")
load("result/mexpr.rda")
#mexpr <- data.matrix(cexpr[,-1])
tumor_indices <- grep("TUMOR", colnames(mexpr))
normal_indices <- grep("NORMAL",colnames(mexpr))
group <- colnames(mexpr)
group[tumor_indices] <- "tumor"
group[normal_indices] <- "normal"
name <- c("C15973","C16254","MDH1")
result <- pathway_gene_metabolite(name)
ggsave("result/pathway_gene_metabolite.png",result$gp)
result$output
load("result/gene_dat.rda")
load("result/metabo_dat.rda")
group <- rep("normal",length(names(mexpr)))
group[grep("TUMOR",names(mexpr))] <- "tumor"
dat <- rbind(gene_dat,log(mexpr))
result <- mlimma(dat,group)
pdf("result/case1.pdnet.pdf")
pdnet(mexpr,gene_dat,result)
dev.off()
png("result/case1.pdnet.png",width = 8, height = 7, units = 'in', res = 200)
pdnet(mexpr,gene_dat,result,nsize=50)
dev.off()
the case2 dataset for metabolism is from Xiao et al(2022) and the transcriptome dataset is from Jiang et al(2019), we choose the patient that have metabolism data and transcriptome data.
load("TNBC/meta_data.rda")
load("TNBC/rna_data.rda")
group <- rep("normal",length(names(meta_data)))
group[grep("_T",names(meta_data))] <- "tumor"
dat <- rbind(rna_data,meta_data)
result <- mlimma(dat,group)
pdf("TNBC/case2.pdnet.pdf")
pdnet(meta_data,rna_data,result,nsize=50)
dev.off()
png("TNBC/case2.pdnet.png",width = 8, height = 7, units = 'in', res = 200)
pdnet(meta_data,rna_data,result,nsize=50)
dev.off()
Change the metabolite name to refmet name RefMet: A Reference list of Metabolite names The main objective of RefMet is to provide a standardized reference nomenclature for both discrete metabolite structures and metabolite species identified by spectroscopic techniques in metabolomics experiments.
refmetid_result <- name2refmet(rownames(mexpr))
write.table(refmetid_result,"result/refmetid_result.txt",quote=F,sep="\t",row.names=F)
search the kegg id corresponding to the metabolites name
keggid_result <- name2keggid(rownames(mexpr)) %>%
tidyr::separate_rows(kegg_id,sep=";")
write.table(keggid_result,"result/keggid_result.txt",quote=F,sep="\t",row.names=F)
search the kegg pathway corresponding to the metabolite name
result_all <- name2pathway(rownames(mexpr))
##### the output is the each metabolite related pathway
result_name2pathway <- result_all$name2pathway
write.table(result_name2pathway,"result/keggpathway_result.txt",quote=F,sep="\t",row.names=F)
keggpathway_result <- keggid2pathway(keggid_result$kegg_id)
head(keggpathway_result)
the PCA of the data
### the pca plot
out_dir <- "result"
p_PCA <- pPCA(mexpr,group)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_1.pdf"),p_PCA$p1)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_2.pdf"),p_PCA$p2)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_3.pdf"),p_PCA$p3)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_1.png"),p_PCA$p1)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_2.png"),p_PCA$p2)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_3.png"),p_PCA$p3)
using the function differential_metabolites in my R packages “MNet”
diff_result <- DM(mexpr,group)
dev.off()
write.table(diff_result,"result/DM_result.txt",quote=F,row.names=F,sep="\t")
## filter the differential metabolites by default fold change >1.5 or < 1/1.5 ,fdr < 0.05 and VIP>1
diff_result_filter <- diff_result %>%
dplyr::filter(fold_change >1.3 | fold_change < 1/1.3) %>%
dplyr::filter(fdr_wilcox<0.1) %>%
dplyr::filter(vip>0.8)
utils::write.table(diff_result,paste0(out_dir,"/2.all_TumorvsNormal.txt"),quote=F,row.names=F,sep="\t")
utils::write.table(diff_result_filter,paste0(out_dir,"/2.diff_TumorvsNormal.txt"),quote=F,row.names=F,sep="\t")
library(dplyr)
out_dir="result"
df <- data.table::fread(paste0("result","/2.diff_TumorvsNormal.txt")) %>%
as.data.frame()
df %>% DT::datatable(options=list(pageLength=5,searchHighlight=T,buttons=c('csv','copy'), dom='Bt',scrollX=T,fixedColumns=list(leftColumns=1)), style='default', caption="", rownames=FALSE, escape=F, extensions=c('Buttons','FixedColumns'))
the volcano plot of metabolites using the function “pVolcano” in the package “MNet”
p_volcano <- pVolcano(diff_result,foldchange=1.5)
#p_volcano
ggplot2::ggsave(paste0(out_dir,"/3.volcano.pdf"),p_volcano)
ggplot2::ggsave(paste0(out_dir,"/3.volcano.png"),p_volcano)
the heatmap plot of differentital metabolites using the function “plot_heatmap” in our package “MNet”
mexpr_diff <- mexpr[rownames(mexpr) %in% diff_result_filter$name,]
p_heatmap <- pHeatmap(mexpr_diff,group,fontsize_row=5,fontsize_col=4,clustering_method="complete",clustering_distance_cols="euclidean")
ggplot2::ggsave(paste0(out_dir,"/3.heatmap.pdf"),p_heatmap,width=10,height=8)
ggplot2::ggsave(paste0(out_dir,"/3.heatmap.png"),p_heatmap,width=10,height=8)
the zscore plot of differentital metabolites using the function “plot_zscore” in our package “MNet”
p_zscore <- pZscore(mexpr_diff,group)
#p_zscore
ggplot2::ggsave(paste0(out_dir,"/3.z_score.pdf"),p_zscore,width=5,height=5)
ggplot2::ggsave(paste0(out_dir,"/3.z_score.png"),p_zscore,width=5,height=5)
using logistic model to select the feature
mexpr_t <- mexpr_diff %>%
t() %>%
data.frame() %>%
dplyr::mutate(group=group)
mexpr1 <- mexpr_t[which(mexpr_t$group==unique(group)[1]),]
mexpr2 <- mexpr_t[which(mexpr_t$group==unique(group)[2]),]
train_sub1 = sample(nrow(mexpr1),8/10*nrow(mexpr1))
train_sub2 = sample(nrow(mexpr2),8/10*nrow(mexpr2))
train_data = rbind(mexpr1[train_sub1,],mexpr2[train_sub2,])
test_data <- rbind(mexpr1[-train_sub1,],mexpr2[-train_sub2,])
#train_sub = sample(nrow(mexpr_t),8/10*nrow(mexpr_t))
#train_data <- mexpr_t[train_sub,]
#test_data <- mexpr_t[-train_sub,]
ML_logic(train_data,test_data,out_dir=paste0(out_dir,"/4.machine_learning_logic/"))
using random forest to do the features selection
mexpr_t <- as.data.frame(t(mexpr))
mexpr_t$group <- group
result <- ML_RF(mexpr_t)
ggsave("result/machine_learning_RF_AUC.pdf",result$p)
ggsave("result/machine_learning_RF_AUC.png",result$p)
the differential abundance (DA) score analysis
## 4.1 the differential metabolites' DA score
metabolites_kegg_id_temp <- name2keggid(diff_result$name)
metabolites_kegg_id <- metabolites_kegg_id_temp %>%
dplyr::distinct(name,.keep_all=TRUE) %>%
dplyr::left_join(diff_result,by="name") %>%
tidyr::separate_rows(kegg_id,sep=";")
utils::write.table(metabolites_kegg_id,paste0(out_dir,"/2.all_TumorvsNormal_kegg_id.txt"),quote=F,row.names=F,sep="\t")
increase_metabolites <- metabolites_kegg_id %>%
dplyr::filter(fold_change>3/2 ) %>%
dplyr::filter(fdr_wilcox<0.05) %>%
dplyr::filter(vip>1) %>%
dplyr::filter(!is.na(kegg_id)) %>%
dplyr::pull(kegg_id)
decrease_metabolites <- metabolites_kegg_id %>%
dplyr::filter(fold_change<2/3) %>%
dplyr::filter(fdr_wilcox<0.05) %>%
dplyr::filter(vip>1) %>%
dplyr::filter(!is.na(kegg_id)) %>%
dplyr::pull(kegg_id)
all_metabolites <- metabolites_kegg_id %>%
dplyr::filter(!is.na(kegg_id)) %>%
dplyr::pull(kegg_id)
DA_result <- DAscore(increase_metabolites,decrease_metabolites,all_metabolites,min_measured_num = 0,sort_plot = "classification")
#DA_result$result
#DA_result$p
ggplot2::ggsave(paste0(out_dir,"/5.DA_score.pdf"),DA_result$p,width=10,height=8)
ggplot2::ggsave(paste0(out_dir,"/5.DA_score.png"),DA_result$p,width=10,height=8)
utils::write.table(DA_result$result,paste0(out_dir,"/5.DA_score.txt"),quote=F,row.names=F,sep="\t")
## 4.2 pathway
result_all <- name2pathway(diff_result_filter$name)
##### the output is the each metabolite related pathway
result_name2pathway <- result_all$name2pathway
##### the related pathway and the metabolites in the pathway
result_pathway <- result_all$pathway
##### the metabolites and its kegg id
kegg_name <- result_all$kegg_name
utils::write.table(result_name2pathway,paste0(out_dir,"/6.diff_name2pathway.txt"),quote=F,row.names=F,sep="\t")
utils::write.table(result_pathway,paste0(out_dir,"/6.diff_pathway_info.txt"),quote=F,row.names=F,sep="\t")
kegg_pathway_filter <- kegg_pathway %>%
dplyr::filter(!is.na(pathway_type)) %>%
dplyr::select(c("ENTRY","PATHWAY"))
kegg_id_need <- c(increase_metabolites,decrease_metabolites)
xgr_output <- xgr(kegg_id_need,kegg_pathway_filter)
#xgr_result <- xgr_output$output
#xgr_output$gp
ggplot2::ggsave(paste0(out_dir,"/6.diff_pathway_xgr.pdf"),xgr_output$gp)
ggplot2::ggsave(paste0(out_dir,"/6.diff_pathway_xgr.png"),xgr_output$gp)
dir.create("result/pathview")
setwd("result/pathview")
tt <- diff_result %>%
dplyr::filter(name %in% kegg_id_need)
name <- tt %>%
dplyr::pull(name)
value <- tt %>%
dplyr::mutate(value=log2(fold_change)) %>%
dplyr::pull(value)
names(value) <- name
pPathview(cpd.data=value)
the time series analysis using mFuzz
TSMfuzz(mexpr,out_dir="result/mfuzz",range=c(4,12))
the time series using supraHex
TSSupraHex(mexpr,newdata=NULL,out_dir="result/supraHex/")
the time series analysis of clinical biomarker
time_series_ALT <- pCliTS(clinical_index,"ALT")
ggsave("result/clinical_time_series.pdf",time_series_ALT)
the correlation analysis bewteen metabolites
dat_cor <- cor(t(mexpr))
pdf("result/correlation_metabolism.pdf")
corrplot::corrplot(dat_cor,type = "upper",order="hclust",tl.cex=0.5)
dev.off()
the clinical biomarkers correlation analysis
clinical_cor <- cor(clinical)
pdf("result/correlation_clinical.pdf")
corrplot::corrplot(clinical_cor, order = "hclust",hclust.method = "ward.D2",type = "upper")
dev.off()
mexpr1 <- t(mexpr)
clinical_data <- as.data.frame(t(clinical))
metabolite_data <- as.data.frame(mexpr1)
p <- plot_correlation_clinical_metabolite_mantel(clinical_data,metabolite_data)
ggsave("result/correlation_metabolites_clinical.pdf",p)
cor_result <- data.frame()
dir.create("result/correlation/",recursive =TRUE)
for (i in 1:ncol(clinical)) {
for (j in 1:nrow(mexpr)) {
cor_temp <- cor.test(as.numeric(clinical[,i]),log2(as.numeric(mexpr[j,])))
cor_p <- cor_temp$p.value
cor_cor <- cor_temp$estimate
cor_result_temp <- data.frame(metabolites=rownames(mexpr)[j],clinical=colnames(clinical)[i],cor=cor_cor,p=cor_p)
cor_result <- rbind(cor_result,cor_result_temp)
}
}
write.table(cor_result,"result/correlation/correlation_metabolites_clinical.txt",quote=F,row.names=F,sep="\t")
cor_result_filter <- cor_result %>%
dplyr::filter(p<0.05)
for (i in 1:nrow(cor_result_filter)) {
mexpr_filter <- mexpr[which(rownames(mexpr)==cor_result_filter$metabolites[i]),]
clinical_filter <- clinical[,which(colnames(clinical)==cor_result_filter$clinical[i])]
dat <- as.data.frame(cbind(log2(t(mexpr_filter)),clinical_filter))
colnames(dat) <- c("metabolites","clinical")
p <- ggpubr::ggscatter(dat, x = "metabolites", y = "clinical",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab =paste0("the log2 value of ",cor_result_filter$metabolites[i]), ylab = paste0("the value of ",cor_result_filter$clinical[i]))
clinical_name = gsub("[/]","",cor_result_filter$clinical[i])
ggsave(paste0("result/correlation/",clinical_name,"_",cor_result_filter$metabolites[i],".pdf"),p)
}
p <- plot_correlation_clinical_metabolite(clinical,mexpr,"CRP","1-Methyladenosine")
ggsave("result/correlation/CRP_1-Methyladenosine.pdf",p)
the survival analysis
p <- survCli(clinical_survival)
pdf("result/survival_OS.pdf")
p$p_OS
dev.off()
pdf("result/survival_RFS.pdf")
ggsave("result/survival_RFS.pdf",p$p_RFS)
dev.off()
pdf("result/survival_EFS.pdf")
ggsave("result/survival_EFS.pdf",p$p_EFS)
dev.off()
automatic classify the samples by each selected metabolites by mean or median, and then plot the survival
metabolites <- c("MT.ND4","MT.ND4L","MT.ND6")
survMet(dat,metabolites,cluster_method="mean",out_dir="survival/metabolites/")
the cox analysis about clinical
result <- MetCox(dat)
write.table(result,"result/clinical_cox.txt",quote=F,sep="\t",row.names = F)
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.29 R6_2.5.1 jsonlite_1.8.0 magrittr_2.0.3
## [5] evaluate_0.16 stringi_1.7.8 cachem_1.0.6 rlang_1.0.4
## [9] cli_3.3.0 rstudioapi_0.13 jquerylib_0.1.4 bslib_0.4.0
## [13] rmarkdown_2.14 tools_4.2.1 stringr_1.4.0 xfun_0.32
## [17] yaml_2.3.5 fastmap_1.1.0 compiler_4.2.1 htmltools_0.5.3
## [21] knitr_1.39 sass_0.4.2